Summary

In this document we explore and fit a model (using Bayesian methods) that describes the weight-dependent intake rate relationship for salmon. The model includes competitive effects and intrinsic fish variation.

This model will later be incorporated into a subsequent model that tries to predict fish growth.


library(tidyverse) # data frame functionality
library(cowplot)   # create multi-plots
library(rstan)
library(bayesplot)

rm(list = ls()) # clear memory

# read in data and wrangle

# group feed consumption data
df_group <- read_csv("100S_feed_provided.csv")

df_group$tank   <- factor(df_group$tank)

# individual feed consumption data
df_indiv <- read_csv("100S_fish_growth.csv") %>%   
  select(c(2,3,4,5,7))

# set up individual data frame
df_indiv$ID   <- factor(df_indiv$ID)
df_indiv$tank <- factor(df_indiv$tank)
df_indiv$days[which(df_indiv$days == 276)] <- 274

# add appropriate mean weights
df_indiv <- df_indiv %>%
    left_join(select(df_group, tank, days, w_bar), by = c("days", "tank"))

# only keep data where intake is known and non-zero
df_indiv <- df_indiv %>%
    filter(!is.na(intake), intake > 0)
p1 <- ggplot(df_indiv) +
    geom_point(aes(x = w, y = intake, color = tank)) +
    labs(x = "Weight (g)", y = "Intake (g/ind.)") +
    theme_bw()

p2 <- ggplot(df_indiv) +
    geom_point(aes(x = w, y = intake, color = tank)) +
    labs(x = "Weight (g)", y = "Intake (g/ind.)") +
    facet_wrap( ~ tank) + 
    theme_bw()

plot_grid(p1, p2, ncol = 1)

p1 <- ggplot(df_indiv) +
    geom_point(aes(x = w, y = intake, color = tank)) +
    labs(x = "Weight (g)", y = "Intake (g/ind.)") +
    scale_x_log10() + scale_y_log10() +
    theme_bw()

p2 <- ggplot(df_indiv) +
    geom_point(aes(x = w, y = intake, color = tank), alpha = 0.1) +
    labs(x = "Weight (g)", y = "Intake (g/ind.)") +
    facet_wrap( ~ tank) + 
    scale_x_log10() + scale_y_log10() +
    theme_bw()

plot_grid(p1, p2, ncol = 1)

ggplot(df_indiv) +
    geom_point(aes(x = w, y = intake, color = (w - w_bar)/w_bar)) +
    labs(x = "Weight (g)", y = "Intake (g/ind.)", 
    color = "Relative\nweight") +
    scale_x_log10() + scale_y_log10() +
    scale_color_gradient2(low = "red", mid = "white", high = "blue", midpoint = 0) +
    theme_bw() +
    theme(
    panel.background = element_rect(fill = "grey10"),
    panel.grid = element_line(colour = "grey35") 
  )

df_indiv <- df_indiv %>%
    mutate(ln_w = log(w))
df_indiv$ln_w_bin <- cut(df_indiv$ln_w, breaks = 8)

df_summary <- df_indiv %>%
  group_by(ln_w_bin, tank) %>%
    summarise(.groups = "drop",
        n = n(), 
        median_w = median(w),
        intake_mean = mean(intake),
        intake_var  = var(intake),
        intake_100 = quantile(intake, probs = 0.1),
        intake_900 = quantile(intake, probs = 0.9),
        cov = sd(intake)/intake_mean) %>%
    filter(n >= 3)
ggplot(df_summary) +
    geom_ribbon(aes(x = median_w, ymin = intake_100, ymax = intake_900, 
    fill = tank)) +
    geom_point(aes(x = median_w, y = intake_mean)) +
    labs(x = "Mean weight (g)", y = "Mean intake (g/ind.)") +
    facet_wrap( ~ tank) +
    scale_x_log10() + scale_y_log10() +
    theme_bw()

p1 <- ggplot(df_summary) +
    geom_point(aes(x = median_w, y = cov, color = tank)) +
    labs(x = "Weight (g)", y = "Coefficient of variation") +
    theme_bw()
    
p2 <- ggplot(df_summary) +
    geom_point(aes(x = intake_mean, y = cov, color = tank)) +
    labs(x = "Mean intake (g/ind.)", y = "Coefficient of variation") +
    theme_bw()

plot_grid(p1,p2)

ggplot(df_indiv) +
    geom_histogram(aes(x = intake), 
        binwidth = 2, fill = "lightblue", color = "blue") +
    facet_grid(tank ~ ln_w_bin) +
    labs(x = "Mean intake (g/ind.)") +
    theme_bw()

df_growth <- df_indiv %>%
    select(ID, tank, days, w, w_bar) %>%
    na.omit() %>% 
    mutate(weight_ratio = w / w_bar) %>%
    arrange(tank, ID, days)

df_growth$dt         <- NA
df_growth$delta_w_dt <- NA
df_growth$delta_w_1  <- NA

current_id <- df_growth$ID[1]
for (j in 2:nrow(df_growth)) {
  if (df_growth$ID[j] == current_id) { # prior data is present
    df_growth$dt[j-1]         <- df_growth$days[j] - df_growth$days[j-1]
    df_growth$delta_w_dt[j-1] <- df_growth$w[j] - df_growth$w[j-1]
    df_growth$delta_w_1[j-1]  <- df_growth$delta_w_dt[j-1] / df_growth$dt[j-1]
  }
  current_id <- df_growth$ID[j] 
}
df_plot <- df_growth %>%
    select(tank, ID, days, w, weight_ratio, delta_w_1) %>%
    arrange(tank, ID, days) %>%
    na.omit()

ggplot(df_plot) +
    geom_hline(yintercept = 0, color = "white") +
    geom_point(aes(x = w, y = delta_w_1, color = weight_ratio)) +
    scale_colour_gradient2(
    low = "red",
    mid = "grey85",
    high = "blue",
    midpoint = 1
  ) +
  facet_wrap( ~ tank) +
    scale_x_log10() +
    labs(x = "Weight (g)", y = "Estimated daily changein weigfht (g)") +
    theme_bw() +
  theme(
    panel.background = element_rect(fill = "grey10"),
    panel.grid = element_line(colour = "grey35") 
  )

df_intake_group <- df_group %>%
  mutate(intake_ind = feed_consumed_g / fish_no, fish_no_F = factor(fish_no))

df_intake_indiv <- df_indiv %>%
    select(ID, tank, days, w, intake) %>%
    na.omit() %>%
    filter(intake > 0)

ggplot() +
    geom_point(data = df_intake_indiv,
   aes(x = w, y = intake), color = "blue", alpha = 1) +
    geom_point(data = df_intake_group,
    aes(x = w_bar, y = intake_ind), color = "red", alpha = 1) +
  labs(
    x = "Weight (g)", y = "Intake (g/ind.)", subtitle = "Red = group, blue = individual") +
    scale_x_log10() +
    scale_y_log10() +
    facet_wrap( ~ tank) +
    theme_bw()

Bayesian Fit

Relationships: * a power-law relationship between mass and intake * a power-law relationship between mass and the coefficient of variation of intake
Variation: * gamma distribution of variation in observed intake from the predicted mean * between-individuals * variation in expected intake among fish is described by a log-normal * between-samples * variation in expected intake among samples is described by a log-normal

# prepare the data (stan wants a list of data not a data frame)
df_fit <- df_indiv %>%
  select(ID, tank, days, w, w_bar, intake) %>%
  filter(intake > 0) %>%
  na.omit() %>%
  mutate(
    w_rel  = (w - w_bar)/w_bar,
    j      = as.integer(factor(ID)),
    sample = factor(paste(tank, "_", days, sep = "")),
    k      = as.integer(sample))

I <- nrow(df_fit)  # number of observations
J <- max(df_fit$j) # number of fish
K <- max(df_fit$k) # number of fish

w_std <- 600.0 # standard weight (a1 = intake, cov = a2)

stan_dat <- list(
  I      = I, # number of observations
  J      = J, # number of fish
  K      = K, # number of samples
  w_std  = 600.0, # standardised fish weight (should be a typical weight)  
  j      = df_fit$j,      # fish 
  k      = df_fit$k,      # sample
  w      = df_fit$w,      # weight
  w_rel  = df_fit$w_rel,  # relative weight
  Intake = df_fit$intake) # intake
# fit the model!
fit_n <- stan(file = 'IntakeFit.stan', data = stan_dat,
  iter = 500, warmup = 250, chains = 4, refresh = 50, seed = 42)
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
## 
## SAMPLING FOR MODEL 'IntakeFit' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.001505 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 15.05 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:   1 / 500 [  0%]  (Warmup)
## Chain 1: Iteration:  50 / 500 [ 10%]  (Warmup)
## Chain 1: Iteration: 100 / 500 [ 20%]  (Warmup)
## Chain 1: Iteration: 150 / 500 [ 30%]  (Warmup)
## Chain 1: Iteration: 200 / 500 [ 40%]  (Warmup)
## Chain 1: Iteration: 250 / 500 [ 50%]  (Warmup)
## Chain 1: Iteration: 251 / 500 [ 50%]  (Sampling)
## Chain 1: Iteration: 300 / 500 [ 60%]  (Sampling)
## Chain 1: Iteration: 350 / 500 [ 70%]  (Sampling)
## Chain 1: Iteration: 400 / 500 [ 80%]  (Sampling)
## Chain 1: Iteration: 450 / 500 [ 90%]  (Sampling)
## Chain 1: Iteration: 500 / 500 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 8.74224 seconds (Warm-up)
## Chain 1:                21.2864 seconds (Sampling)
## Chain 1:                30.0286 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'IntakeFit' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0.000771 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 7.71 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:   1 / 500 [  0%]  (Warmup)
## Chain 2: Iteration:  50 / 500 [ 10%]  (Warmup)
## Chain 2: Iteration: 100 / 500 [ 20%]  (Warmup)
## Chain 2: Iteration: 150 / 500 [ 30%]  (Warmup)
## Chain 2: Iteration: 200 / 500 [ 40%]  (Warmup)
## Chain 2: Iteration: 250 / 500 [ 50%]  (Warmup)
## Chain 2: Iteration: 251 / 500 [ 50%]  (Sampling)
## Chain 2: Iteration: 300 / 500 [ 60%]  (Sampling)
## Chain 2: Iteration: 350 / 500 [ 70%]  (Sampling)
## Chain 2: Iteration: 400 / 500 [ 80%]  (Sampling)
## Chain 2: Iteration: 450 / 500 [ 90%]  (Sampling)
## Chain 2: Iteration: 500 / 500 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 6.36567 seconds (Warm-up)
## Chain 2:                5.09602 seconds (Sampling)
## Chain 2:                11.4617 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'IntakeFit' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 0.00077 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 7.7 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:   1 / 500 [  0%]  (Warmup)
## Chain 3: Iteration:  50 / 500 [ 10%]  (Warmup)
## Chain 3: Iteration: 100 / 500 [ 20%]  (Warmup)
## Chain 3: Iteration: 150 / 500 [ 30%]  (Warmup)
## Chain 3: Iteration: 200 / 500 [ 40%]  (Warmup)
## Chain 3: Iteration: 250 / 500 [ 50%]  (Warmup)
## Chain 3: Iteration: 251 / 500 [ 50%]  (Sampling)
## Chain 3: Iteration: 300 / 500 [ 60%]  (Sampling)
## Chain 3: Iteration: 350 / 500 [ 70%]  (Sampling)
## Chain 3: Iteration: 400 / 500 [ 80%]  (Sampling)
## Chain 3: Iteration: 450 / 500 [ 90%]  (Sampling)
## Chain 3: Iteration: 500 / 500 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 7.44511 seconds (Warm-up)
## Chain 3:                4.78214 seconds (Sampling)
## Chain 3:                12.2273 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'IntakeFit' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 0.000765 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 7.65 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:   1 / 500 [  0%]  (Warmup)
## Chain 4: Iteration:  50 / 500 [ 10%]  (Warmup)
## Chain 4: Iteration: 100 / 500 [ 20%]  (Warmup)
## Chain 4: Iteration: 150 / 500 [ 30%]  (Warmup)
## Chain 4: Iteration: 200 / 500 [ 40%]  (Warmup)
## Chain 4: Iteration: 250 / 500 [ 50%]  (Warmup)
## Chain 4: Iteration: 251 / 500 [ 50%]  (Sampling)
## Chain 4: Iteration: 300 / 500 [ 60%]  (Sampling)
## Chain 4: Iteration: 350 / 500 [ 70%]  (Sampling)
## Chain 4: Iteration: 400 / 500 [ 80%]  (Sampling)
## Chain 4: Iteration: 450 / 500 [ 90%]  (Sampling)
## Chain 4: Iteration: 500 / 500 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 5.95893 seconds (Warm-up)
## Chain 4:                5.4562 seconds (Sampling)
## Chain 4:                11.4151 seconds (Total)
## Chain 4:
## Warning: There were 4 chains where the estimated Bayesian Fraction of Missing Information was low. See
## https://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 1.31, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## https://mc-stan.org/misc/warnings.html#tail-ess
# check out some predicted model parameters and log-likelihood
model_par <- c("a1", "b1", "c1", "a2", "b2", "sigma_j", "sigma_k")

# check for chain convergence
rstan::traceplot(object = fit_n, pars = model_par, inc_warmup = TRUE, 
  ncol = 3)

mcmc_hist(fit_n, pars = model_par) # show histograms of posterior parameter estimates

l_par <- rstan::extract(fit_n, pars = c("a1", "b1", "c1", "a2", "b2")) 

df_predict <- tibble(
  w = seq(from = min(df_fit$w), to = max(df_fit$w), length.out = 100)
) %>%
  mutate(I_025 = 0.0, I_500 = 0.0, I_975 = 0.0, cv_025 = 0.0, cv_500 = 0.0, cv_975 = 0.0)

for (i in 1:nrow(df_predict)) {
  df_predict$I_025[i] <- quantile(
    l_par$a1*(df_predict$w[i]/w_std)^l_par$b1, probs = 0.025)
  df_predict$I_500[i] <- quantile(
    l_par$a1*(df_predict$w[i]/w_std)^l_par$b1, probs = 0.500)
  df_predict$I_975[i] <- quantile(
    l_par$a1*(df_predict$w[i]/w_std)^l_par$b1, probs = 0.975)
  
  df_predict$cv_025[i] <- quantile(
    l_par$a2*exp(l_par$b2*(df_predict$w[i]-w_std)), probs = 0.025)
  df_predict$cv_500[i] <- quantile(
    l_par$a2*exp(l_par$b2*(df_predict$w[i]-w_std)), probs = 0.500)
  df_predict$cv_975[i] <- quantile(
    l_par$a2*exp(l_par$b2*(df_predict$w[i]-w_std)), probs = 0.975)
}
p1 <- ggplot() +
  geom_point(data = df_fit,
    aes(x = w, y = intake), alpha = 0.2, color = "salmon") +
    geom_ribbon(data = df_predict,
    aes(x = w, ymin = I_025, ymax = I_975), fill = "salmon") +
    geom_line(data = df_predict,
    aes(x = w, y = I_500), color = "darkred") +
    labs(x = "Weight (g)", y = "Intake (g/ind.)") +
    theme_bw()

p2 <- ggplot() +
    geom_ribbon(data = df_predict,
    aes(x = w, ymin = cv_025, ymax = cv_975), fill = "salmon") +
    geom_line(data = df_predict,
    aes(x = w, y = cv_500), color = "darkred") +
 geom_point(data = df_summary,
   aes(x = median_w, y = cov)) +
    labs(x = "Weight (g)", y = "Coefficient of variation") +
    theme_bw()

plot_grid(p1, p2)

ggplot() +
    geom_point(data = df_fit,
    aes(x = w, y = intake, color = w_rel), alpha = 0.5) +
    geom_ribbon(data = df_predict,
    aes(x = w, ymin = I_025, ymax = I_975), fill = "salmon") +
    geom_line(data = df_predict,
    aes(x = w, y = I_500), color = "darkred") +
    labs(x = "Weight (g)", y = "Intake (g/ind.)", 
    color = "Relative\nweight") +
    scale_color_gradient2(low = "red", mid = "white", high = "blue", 
    midpoint = 0) +
    theme_bw() +
    theme(
    panel.background = element_rect(fill = "grey10"),
    panel.grid = element_line(colour = "grey35") 
  )

df_fit$intake_model <- 0.0

for (i in 1:I) {
  df_fit$intake_model[i] <- median(
    l_par$a1*exp(l_par$c1*df_fit$w_rel[i])*(((df_fit$w[i]/w_std)^l_par$b1))
  )
}
ggplot(df_fit) +
    geom_point(aes(x = intake_model, y = intake, color = w_rel)) +
    scale_color_gradient2(low = "red", mid = "white", high = "blue", 
    midpoint = 0) +
    geom_abline(slope = 1, intercept = 0, linetype = "solid",
    color = "gold") + 
    labs(
    x = "Predicted intake (g/ind.)", 
    y = "Observed intake (g/ind.)", 
    color = "Relative\nweight") +
    facet_wrap( ~ factor(days)) +
    theme_bw() +
    theme(
    panel.background = element_rect(fill = "grey10"),
    panel.grid = element_line(colour = "grey35") 
  )

df_predict <- tibble(
  w_rel = seq(from = min(df_fit$w_rel), to = max(df_fit$w_rel), 
    length.out = 100),
    I_rel_025 = 0.0, I_rel_500 = 0.0, I_rel_975 = 0.0)
    
for (i in 1:nrow(df_predict)) {
  df_predict$I_rel_025[i] <- quantile(
    exp(l_par$c1*df_predict$w_rel[i]), probs = 0.025)
  df_predict$I_rel_500[i] <- quantile(
    exp(l_par$c1*df_predict$w_rel[i]), probs = 0.500)
  df_predict$I_rel_975[i] <- quantile(
    exp(l_par$c1*df_predict$w_rel[i]), probs = 0.975)
}

p1 <- ggplot(df_predict) +
    geom_ribbon(aes(x = w_rel, ymin = I_rel_025, ymax = I_rel_975), 
    fill = "salmon") + 
    geom_line(aes(x = w_rel, y = I_rel_500)) + 
    xlim(-1,1) + # ylim(0,NA) + 
    labs(x = "Relative weight", y = "Relative intake") +
    theme_bw()

p2 <- ggplot(df_fit) +
    geom_histogram(aes(x = w_rel), fill = "salmon", color = "darkred") + 
    labs(x = "Relative weight", y = "Frequency") +
    xlim(-1,1) +
    theme_bw()

plot_grid(p1, p2, ncol = 1)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).

l_par <- rstan::extract(fit_n, pars = c("sigma_j", "RE_j"))

df_j <- tibble(j = 1:J)
df_j$delta <-   apply(l_par$RE_j, MARGIN = 2, FUN = median)
df_j <- df_j %>% left_join(select(df_fit, j, w_rel, intake_model), by = "j")

ggplot(df_j) +
    geom_vline(xintercept = 0, linetype = "dashed") +
    geom_hline(yintercept = 0, linetype = "dashed") +
    geom_point(aes(x = delta, y = w_rel)) +
    labs(x = "Individual-level deviation", y = "Relative weight") +
    theme_bw() +
    theme(panel.grid = element_blank())